In [1]:
from langchain.document_loaders import UnstructuredFileLoader
from langchain.chains.summarize import load_summarize_chain
from langchain.chains.question_answering import load_qa_chain

In [None]:
# Unzip data folder

import zipfile
with zipfile.ZipFile('../../data.zip', 'r') as zip_ref:
    zip_ref.extractall('..')

### Load Documents

In [2]:
sm_loader = UnstructuredFileLoader("../data/muir_lake_tahoe_in_winter.txt")
sm_doc = sm_loader.load()

lg_loader = UnstructuredFileLoader("../data/PaulGrahamEssays/worked.txt")
lg_doc = lg_loader.load()

In [3]:
def doc_summary(docs):
    print (f'You have {len(docs)} document(s)')
    
    num_words = sum([len(doc.page_content.split(' ')) for doc in docs])
    
    print (f'You have roughly {num_words} words in your docs')
    print ()
    print (f'Preview: \n{docs[0].page_content.split(". ")[0]}')

In [4]:
doc_summary(sm_doc)

You have 1 document(s)
You have roughly 2298 words in your docs

Preview: 
The winter glory of the Sierra ! How little is known of it! Californians admire descriptions of the Swiss Alps, reading with breathless interest how ice and snow load their sublime heights, and booming avalanches sweep in glorious array through their crowded forests, while our own icy, snow-laden mountains, with their unrivaled forests, loom unnoticed along our eastern horizon


In [5]:
doc_summary(lg_doc)

You have 1 document(s)
You have roughly 12551 words in your docs

Preview: 
February 2021Before college the two main things I worked on, outside of school,

were writing and programming


### Load Your LLM

In [6]:
from langchain import OpenAI

In [7]:
OPENAI_API_KEY = '...'

In [8]:
llm = OpenAI(openai_api_key=OPENAI_API_KEY)

### Summarize: Stuff

In [9]:
chain = load_summarize_chain(llm, chain_type="stuff", verbose=True)

In [10]:
chain.run(sm_doc)



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWrite a concise summary of the following:


"The winter glory of the Sierra ! How little is known of it! Californians admire descriptions of the Swiss Alps, reading with breathless interest how ice and snow load their sublime heights, and booming avalanches sweep in glorious array through their crowded forests, while our own icy, snow-laden mountains, with their unrivaled forests, loom unnoticed along our eastern horizon. True, only mountaineers may penetrate their snow-blocked fastnesses to behold them in all their white wild grandeur, but to every healthy man and woman, and even to children, many of the subalpine valleys and lake-basins, six or seven thousand feet above the sea, remain invitingly open and approachable all winter. With a friend and his two little sons I have just returned from a week of bracing weathering around Lake Tahoe, in which we


[1m> Finished chain.[0m

[1m> Finished chain.[0m


" This article is a description of a winter trip to Lake Tahoe, California. It highlights the mild winter weather and snow-covered mountains, as well as the abundance of glacier lakes in the Sierra region. The author also mentions the local lumber companies and their destructive effects on the lake's forests, as well as the activities of the few remaining winter dwellers. The article concludes with a description of a snowshoeing excursion down the Truckee canon to the railroad."

In [11]:
chain.run(lg_doc)



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWrite a concise summary of the following:


"February 2021Before college the two main things I worked on, outside of school,

were writing and programming. I didn't write essays. I wrote what

beginning writers were supposed to write then, and probably still

are: short stories. My stories were awful. They had hardly any plot,

just characters with strong feelings, which I imagined made them

deep.The first programs I tried writing were on the IBM 1401 that our

school district used for what was then called "data processing."

This was in 9th grade, so I was 13 or 14. The school district's

1401 happened to be in the basement of our junior high school, and

my friend Rich Draves and I got permission to use it. It was like

a mini Bond villain's lair down there, with all these alien-looking

machines  CPU, disk drives, printer, card reader  sitting up


InvalidRequestError: This model's maximum context length is 4097 tokens, however you requested 19068 tokens (18812 in your prompt; 256 for the completion). Please reduce your prompt; or completion length.

### Summarize: Map Reduce

In [12]:
chain = load_summarize_chain(llm, chain_type="map_reduce", verbose=True)

In [13]:
chain.run(sm_doc)



[1m> Entering new MapReduceDocumentsChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWrite a concise summary of the following:


"The winter glory of the Sierra ! How little is known of it! Californians admire descriptions of the Swiss Alps, reading with breathless interest how ice and snow load their sublime heights, and booming avalanches sweep in glorious array through their crowded forests, while our own icy, snow-laden mountains, with their unrivaled forests, loom unnoticed along our eastern horizon. True, only mountaineers may penetrate their snow-blocked fastnesses to behold them in all their white wild grandeur, but to every healthy man and woman, and even to children, many of the subalpine valleys and lake-basins, six or seven thousand feet above the sea, remain invitingly open and approachable all winter. With a friend and his two little sons I have just returned from a week of bracing weathering around Lake Tahoe, in which we enjoyed glorious views of winter, fine



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWrite a concise summary of the following:


" This article is an account of a winter excursion to Lake Tahoe in California, where the snowfall this season has been recorded at 22 feet and 4 inches. Despite the deep snow around the lake, the mild winter has kept temperatures above freezing. The post-glacial changes to the lake are minimal and the surrounding forests are still mostly intact, though logging has begun in earnest, with a contract for 96,000,000 feet of lumber and 60,000 cords of wood this season. The author recounts their adventures, including rowing, bathing, snow shoeing, and visits to old cabins. They ended the trip by snowshoeing down the Truckee canon to the railroad."


CONCISE SUMMARY:[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


' This article is an account of a winter excursion to Lake Tahoe in California. Despite the deep snow, the temperatures have remained above freezing. Logging has begun in the surrounding forests, but the post-glacial changes to the lake are minimal. The author recounts their adventures, including rowing, bathing, snow shoeing, and visits to old cabins. They ended the trip by snowshoeing down the Truckee canon to the railroad.'

In [14]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [15]:
text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size = 400,
    chunk_overlap = 0
)

In [16]:
lg_docs = text_splitter.split_documents(lg_doc)

In [17]:
doc_summary(lg_docs)

You have 201 document(s)
You have roughly 12751 words in your docs

Preview: 
February 2021Before college the two main things I worked on, outside of school,

were writing and programming


In [18]:
chain.run(lg_docs[:5])



[1m> Entering new MapReduceDocumentsChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWrite a concise summary of the following:


"February 2021Before college the two main things I worked on, outside of school,

were writing and programming. I didn't write essays. I wrote what

beginning writers were supposed to write then, and probably still

are: short stories. My stories were awful. They had hardly any plot,

just characters with strong feelings, which I imagined made them"


CONCISE SUMMARY:[0m
Prompt after formatting:
[32;1m[1;3mWrite a concise summary of the following:


"deep.The first programs I tried writing were on the IBM 1401 that our

school district used for what was then called "data processing."

This was in 9th grade, so I was 13 or 14. The school district's

1401 happened to be in the basement of our junior high school, and

my friend Rich Draves and I got permission to use it. It was like

a mini Bond villain's lair down there, with all these alien-looki

" The speaker recounts their experience of programming with the IBM 1401 in their junior high school basement, which was their first time working with computers. They describe the environment as similar to a mini Bond villain's lair, and detail their attempts to program in Fortran and approximate pi with math. They also recall the moment they learned that programs can not terminate, and their surprise when one of their programs didn't."

### Summarize: Refine

In [19]:
chain = load_summarize_chain(llm, chain_type="refine", verbose=True)

In [20]:
chain.run(lg_docs[:5])



[1m> Entering new RefineDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWrite a concise summary of the following:


"February 2021Before college the two main things I worked on, outside of school,

were writing and programming. I didn't write essays. I wrote what

beginning writers were supposed to write then, and probably still

are: short stories. My stories were awful. They had hardly any plot,

just characters with strong feelings, which I imagined made them"


CONCISE SUMMARY:[0m

[1m> Finished chain.[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYour job is to produce a final summary
We have provided an existing summary up to a certain point:  Prior to college, the author practiced writing and programming in their spare time. The author wrote short stories that lacked plot, but the characters did have strong emotions.
We have the opportunity to refine the existing summary(only

"\n\nPrior to college, the author practiced writing and programming in their spare time. The author wrote short stories that lacked plot, but the characters did have strong emotions. In 9th grade, the author and a friend got permission to use an IBM 1401 in the basement of their junior high school for data processing. This experience exposed the author to programming and helped to develop their skills, using an early version of Fortran on machines such as CPU, disk drives, printer, card reader, and printing the results on the printer. The author was initially puzzled by the 1401, as the only form of input to programs was data stored on punched cards, and they had no data stored on punched cards. As a result, the author had to do things that didn't rely on any input, like calculate approximations of pi, but they didn't know enough math to do anything interesting of that type. Despite this, they were able to learn that programs could fail to terminate, much to their surprise."

### Q&A: Map Re-Rank

In [21]:
chain = load_qa_chain(llm, chain_type="map_rerank", verbose=True, return_intermediate_steps=True)

In [22]:
query = "Who was the authors friend who he got permission from to use the IBM 1401?"

result = chain({"input_documents": lg_docs[:5], "question": query}, return_only_outputs=True)



[1m> Entering new MapRerankDocumentsChain chain...[0m
Prompt after formatting:
[32;1m[1;3mUse the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

In addition to giving an answer, also return a score of how fully it answered the user's question. This should be in the following format:

Question: [question here]
Helpful Answer: [answer here]
Score: [score between 0 and 100]

How to determine the score:
- Higher is a better answer
- Better responds fully to the asked question, with sufficient level of detail
- If you do not know the answer based on the context, that should be a score of 0
- Don't be overconfident!

Example #1

Context:
---------
Apples are red
---------
Question: what color are apples?
Helpful Answer: red
Score: 100

Example #2

Context:
---------
it was night and the witness forgot his glasses. he was not sure if it was a sports car or an suv
---------
Questi


[1m> Finished chain.[0m


In [23]:
result['output_text']

' Rich Draves'

In [24]:
result['intermediate_steps']

[{'answer': ' This document does not answer the question', 'score': '0'},
 {'answer': ' Rich Draves', 'score': '100'},
 {'answer': ' This document does not answer the question.', 'score': '0'},
 {'answer': ' This document does not answer the question.', 'score': '0'},
 {'answer': ' This document does not answer the question', 'score': '0'}]